clear			all
set 			more off
cd 				"~/Dropbox/Indonesia Migration/" // Specify your root directory here
grstyle			init
grstyle			set plain, grid 

/*******************************************************************************
EDUCATION DISTRIBUTION 
*******************************************************************************/
use				"Data/Census_1976.dta", clear		
foreach			t in 1980 1985 1990 1995 2000 2005 2010 {
append			using "Data/Census_`t'.dta"
}
gen 			male = sex==1
gen 			age_years = age if age != 999
gen 			education_level = edattain if inrange(edattain,1,4)
gen 			single =  marst==1
gen 			lives_alone = hhtype == 1
gen				LFP = 1 if inrange(empstat,1,2)
replace			LFP = 0 if missing(LFP)	& !missing(empstat) 
gen				emp = 1 if inrange(classwk,1,4)
replace			emp = 0 if missing(emp)	& !missing(classwk)
gen				wage = 1 if classwk == 2
replace			wage = 0 if missing(wage) & !missing(classwk)
gen				SE = 1 if classwk == 1
replace			SE = 0 if missing(SE) & !missing(classwk)
gen				unpaid = 1 if classwk == 3
replace			unpaid = 0 if missing(unpaid) & !missing(classwk)
gen 			urban_dist = urban == 2

label 			var male "Proportion male"
label 			var age_years "Average age"
label 			var education_level "Level of education"
label 			var single "Proportion never married"
label 			var lives_alone "One-member househoold"
label 			var LFP "Labour force participation"
label 			var emp "Employed"
label 			var wage "Wage employment"
label 			var SE "Self-employment"
label 			var unpaid "Unpaid work"
label 			var urban_dist "Lives in urban area"

twoway 			(hist education_level if migstat == 0,discrete lcolor(gs12) fcolor(gs12)) ///
				(hist education_level if inlist(migstat,1,2),discrete fcolor(none) lcolor(red)), ///
				legend(order (1 "Natives" 2 "Migrants") row(1)) xtitle("Education level")
graph 			export "Figures/Education_distribution.pdf", as(pdf) replace			
  
  
/*******************************************************************************
			Serial correlation between migration inflows					
*******************************************************************************/
use				"Data/Merged", clear
gen 			lagged_per_immig_cst_5Y = l.per_immig_cst_5Y
forval 			i = 1980(5)2010 {
local			j = `i' - 5
if 				`i' == 1980  local j = `i' - 4
reg				per_immig_cst_5Y lagged_per_immig_cst_5Y if inlist(year,`i-5',`i')
local 			eq `"Percent migrants{subscript:d,`i'} ="'						// find the dependt variable
local 			eq "`eq' `: di  %4.2f _b[_cons]'"								// choose a nice display format for the constant
local 			eq `"`eq' `=cond(_b[lagged_per_immig_cst_5Y]>0, "+", "-")'"'	// should we add or subtract
// We already chose the plus or minus sign so we need to strip a minus sign when it is there
local 			eq `"`eq' `:di %4.2f abs(_b[lagged_per_immig_cst_5Y])' Percent migrant{subscript:d,`j'}"'
local 			eq `"`eq' + {&epsilon}{subscript:d,t}"'							// add the error term
local 			eq "`eq'; R{superscript:2} = `: di  %4.2f e(r2)'"				// add other stats

tw				(scatter per_immig_cst_5Y lagged_per_immig_cst_5Y if inlist(year,`i-5',`i')) ///
				(lfitci per_immig_cst_5Y lagged_per_immig_cst_5Y if inlist(year,`i-5',`i')), note("`eq'") /// add the equation as a note to the graph
				xtitle("New migrant population share in `j'") ///
				ytitle("New migrant population share in `i'") ///
				xlabel(0(5)30) ylabel(0(5)30) legend(off) 
graph 			export "Figures/Migration_correlation_`i'.pdf", as(pdf) replace			
}		


/*******************************************************************************
Serial correlation between origin share mix of migrants in different survey year
*******************************************************************************/
use				"Data/Census_1976.dta", clear
foreach			t in 1980 1985 1990 1995 2000 2005 2010{
append			using "Data/Census_`t'.dta"
}

drop			if migtype != 1
bys				year: egen total_outflow = total(perwt)
bys				origin year: egen movers = total(perwt)
gen				share_movers = movers/total_outflow
duplicates		drop year origin , force
label value		origin "ORIGIN"
label define 	ORIGIN 360011 "Aceh" 360012 "North Sumatra" 360013 "West Sumatra" ///
				360014 "Riau" 360015 "Jambi" 360016 "South Sumatra & Bangka Belitung" ///
				360017 "Bengkulu" 360018 "Lampung" 360031 "Jakarta" 360032 "West Java and Banten" ///
				360033 "Central Java" 360034 "Yogyakarta" 360035 "East Java" 360051 "Bali" ///
				360052 "West Nusa Tenggara" 360053 "East Nusa Tenggara" 360054 "East Timor" ///
				360061 "West Kalimantan" 360062 "Central Kalimantan" 360063 "South Kalimantan" ///
				360064 "East Kalimantan" 360071 "North Sulawesi and Gorontolo" 360072 "Central Sulawesi" ///
				360073 "South and West Sulawesi" 360074 "South Sulawesi" 360081 "Maluku and North Maluku" ///
				360094 "Papua and West Papua" 360098 "Unknown" 360099 "NIU" 
replace 		year = 1975 if year ==1976
xtset           origin year, delta(5)
generate		lagged_share_movers = l.share_movers



forval 			i = 1980(5)2010 {
local			j = `i' - 5
if 				`i' == 1980  local j = `i' - 4
tw				(scatter share_movers lagged_share_movers if inlist(year,`i-5',`i'), mlabel(origin)) ///
				(line share_movers share_movers if inlist(year,`i-5',`i')), note("`eq'") /// add the equation as a note to the graph
				xtitle("Share of new migrants from each province in `j'") ///
				ytitle("Share of new migrants from each province in `i'") ///
				xlabel(0(0.05)0.3) ylabel(0(0.05)0.3) ///
				legend(off) 
graph 			export "Figures/Origin_correlation_`i'.pdf", as(pdf) replace			
}
			
/*******************************************************************************
Instrument
*******************************************************************************/
use				"Data/Merged", clear

reg				per_immig_cst_5Y pred_per_immig_cst_5Y_5Y
local 			eq `"Actual % migrants{subscript:dt} ="'						// find the dependt variable
local 			eq "`eq' `: di  %4.2f _b[_cons]'"								// choose a nice display format for the constant
local 			eq `"`eq' `=cond(_b[pred_per_immig_cst_5Y_5Y]>0, "+", "-")'"'	// should we add or subtract
// We already chose the plus or minus sign so we need to strip a minus sign when it is there
local 			eq `"`eq' `:di %4.2f abs(_b[pred_per_immig_cst_5Y_5Y])' Predicted % migrant{subscript:dt}"'
local 			eq `"`eq' + {&epsilon}{subscript:d,t}"'							// add the error term
local 			eq "`eq'; R{superscript:2} = `: di  %4.2f e(r2)'"				// add other stats

tw				(scatter per_immig_cst_5Y pred_per_immig_cst_5Y_5Y) ///
				(lfitci per_immig_cst_5Y pred_per_immig_cst_5Y_5Y), note("`eq'") /// add the equation as a note to the graph
				xtitle("Predicted population share of new migrant in the district in year t") ///
				ytitle("Actual population share in year t") ///
				legend(off) 	
graph 			export "Figures/IV_1", as(pdf) replace


reg				per_immig_cst_5Y pred_per_immig_cst_10Y_5Y
local 			eq `"Actual % migrants{subscript:dt} ="'						// find the dependt variable
local 			eq "`eq' `: di  %4.2f _b[_cons]'"								// choose a nice display format for the constant
local 			eq `"`eq' `=cond(_b[pred_per_immig_cst_10Y_5Y]>0, "+", "-")'"'	// should we add or subtract
// We already chose the plus or minus sign so we need to strip a minus sign when it is there
local 			eq `"`eq' `:di %4.2f abs(_b[pred_per_immig_cst_10Y_5Y])' Predicted % migrant{subscript:dt}"'
local 			eq `"`eq' + {&epsilon}{subscript:d,t}"'							// add the error term
local 			eq "`eq'; R{superscript:2} = `: di  %4.2f e(r2)'"				// add other stats
tw				(scatter per_immig_cst_5Y pred_per_immig_cst_5Y_5Y) ///
				(lfitci per_immig_cst_5Y pred_per_immig_cst_5Y_5Y), note("`eq'") /// add the equation as a note to the graph
				xtitle("Predicted population share of new migrant in the district in year t") ///
				ytitle("Actual population share in year t") ///
				legend(off) 	
graph 			export "Figures/IV_2", as(pdf) replace







